This document shows the supplementary material of the article entitled “The Integrated nested Laplace approximation for fitting Dirichlet regression models’’. In particular, we show all the results obtained in the simulation scenarios (1,2 and 3) presented in the paper.
Thi simulation is based on a Dirichlet regression with four categories and one parameter per category, the intercept, that is: \[\begin{align} \label{eq:dirichlet_example1} {\boldsymbol{Y}}_n & \sim \text{Dirichlet}(\alpha_{1n}, \ldots, \alpha_{4n}) \,, n = 1, \ldots, N, \nonumber \\ \log(\alpha_{1n}) & = \beta_{01}, \nonumber \\ \log(\alpha_{2n}) & = \beta_{02}, \nonumber \\ \log(\alpha_{3n}) & = \beta_{03}, \\ \log(\alpha_{4n}) & = \beta_{04}. \nonumber \end{align}\] {Five different datasets of sizes \(N= 50, 100\), \(500, 1000, 10000\) }with this structure were simulated letting \(\beta_{0c}, c = 1,\ldots,4\) to be \(-2.4\), \(1.2\), \(-3.1\) and \(1.3\), respectively. We used vague prior distributions for the {latent field ({\(\beta_{0c}, c = 1,\ldots,4\)})}. In particular \(p(x_m) \sim\) \(\mathcal{N}(0, \tau = 0.0001)\). As the response values are not close to 0 and 1, no transformation was needed.
results1 <- readRDS(file = "simulation1/simulation1_50-500.RDS")
results2 <- readRDS(file = "simulation1/simulation1_1000-10000.RDS")
results <- c(results1, results2)
res_ratios <- readRDS("simulation1/simulation1_ratios_jags.RDS")
#Computational times
result_time <- rbind(results$n50$times,
results$n100$times,
results$n500$times,
results$n1000$times,
results$n10000$times)
colnames(result_time) <- c("R-JAGS", "dirinla", "long R-JAGS")
rownames(result_time) <- paste0( c(50, 100, 500, 1000, 10000))
as.data.frame(result_time)
N <- c(50, 100, 500, 1000, 10000)
for(i in N){
cat("\n")
cat(sprintf(paste0("### N = ", i, "\n")))
cat(paste0("{width=100%}"))
cat("\n")
}
### ratios dirinla
result_ratio1 <- cbind(rbind(round(results$n50$ratio1_intercept, 4),
round(results$n100$ratio1_intercept, 4),
round(results$n500$ratio1_intercept, 4),
round(results$n1000$ratio1_intercept, 4),
round(results$n10000$ratio1_intercept, 4)),
rbind(round(results$n50$ratio1_mu, 4),
round(results$n100$ratio1_mu, 4),
round(results$n500$ratio1_mu, 4),
round(results$n1000$ratio1_mu, 4),
round(results$n10000$ratio1_mu, 4)))
colnames(result_ratio1) <- c(paste0("beta0", 1:4), paste0("mu", 1:4))
rownames(result_ratio1) <- paste0( c(50, 100, 500, 1000, 10000))
result_ratio2 <- cbind(rbind(round(sqrt(results$n50$ratio2_intercept), 4),
round(sqrt(results$n100$ratio2_intercept), 4),
round(sqrt(results$n500$ratio2_intercept), 4),
round(sqrt(results$n1000$ratio2_intercept), 4),
round(sqrt(results$n10000$ratio2_intercept), 4)),
rbind(round(sqrt(results$n50$ratio2_mu), 4),
round(sqrt(results$n100$ratio2_mu), 4),
round(sqrt(results$n500$ratio2_mu), 4),
round(sqrt(results$n1000$ratio2_mu), 4),
round(sqrt(results$n10000$ratio2_mu), 4)))
colnames(result_ratio2) <- c(paste0("beta0", 1:4), paste0("mu", 1:4))
rownames(result_ratio2) <- paste0( c(50, 100, 500, 1000, 10000))
### ratios short jags
result_ratio1_jags <- cbind(rbind(round(res_ratios$n50$ratio1_beta0_jags, 4),
round(res_ratios$n100$ratio1_beta0_jags, 4),
round(res_ratios$n500$ratio1_beta0_jags, 4),
round(res_ratios$n1000$ratio1_beta0_jags, 4),
round(res_ratios$n10000$ratio1_beta0_jags, 4)),
rbind(round(res_ratios$n50$ratio1_mu_jags, 4),
round(res_ratios$n100$ratio1_mu_jags, 4),
round(res_ratios$n500$ratio1_mu_jags, 4),
round(res_ratios$n1000$ratio1_mu_jags, 4),
round(res_ratios$n10000$ratio1_mu_jags, 4)))
colnames(result_ratio1_jags) <- c(paste0("beta0", 1:4), paste0("mu", 1:4))
rownames(result_ratio1_jags) <- paste0( c(50, 100, 500, 1000, 10000))
result_ratio2_jags <- cbind(rbind(round(sqrt(res_ratios$n50$ratio2_beta0_jags), 4),
round(sqrt(res_ratios$n100$ratio2_beta0_jags), 4),
round(sqrt(res_ratios$n500$ratio2_beta0_jags), 4),
round(sqrt(res_ratios$n1000$ratio2_beta0_jags), 4),
round(sqrt(res_ratios$n10000$ratio2_beta0_jags), 4)),
rbind(round(sqrt(res_ratios$n50$ratio2_mu_jags), 4),
round(sqrt(res_ratios$n100$ratio2_mu_jags), 4),
round(sqrt(res_ratios$n500$ratio2_mu_jags), 4),
round(sqrt(res_ratios$n1000$ratio2_mu_jags), 4),
round(sqrt(res_ratios$n10000$ratio2_mu_jags), 4)))
colnames(result_ratio2_jags) <- c(paste0("beta0", 1:4), paste0("mu", 1:4))
rownames(result_ratio2_jags) <- paste0( c(50, 100, 500, 1000, 10000))
cat(sprintf(paste0("### ratio1-dirinla", "\n")))
as.data.frame(result_ratio1)
cat(sprintf(paste0("### ratio1-RJAGS", "\n")))
as.data.frame(result_ratio1_jags)
cat(sprintf(paste0("### ratio2-dirinla", "\n")))
as.data.frame(result_ratio2)
cat(sprintf(paste0("### ratio2-RJAGS", "\n")))
as.data.frame(result_ratio2_jags)
The second setting is based on a Dirichlet regression with a different covariate per category: \[\begin{align} {\boldsymbol{Y}}_n & \sim \text{Dirichlet}(\alpha_{1n}, \ldots, \alpha_{4n}) \,, i = 1, \ldots, N, \nonumber \\ \log(\alpha_{1n}) & = \beta_{01} + \beta_{11} v_{1n}, \nonumber \\ \log(\alpha_{2n}) & = \beta_{02} + \beta_{12} v_{2n}, \nonumber \\ \log(\alpha_{3n}) & = \beta_{03} + \beta_{13} v_{3n}, \\ \log(\alpha_{4n}) & = \beta_{04} + \beta_{14} v_{4n}. \nonumber \end{align}\] {Again, we simulated five different datasets of sizes \(N= 50, 100\), \(500, 1000, 10000\). We set values for \(\beta_{0c}\) and \(\beta_{1c}\) for \(c = 1, \ldots, 4\) to \(-1.5, 1, -3, 1.5, 2, -3 , -1, 5\) respectively, and we simulated covariates from a Uniform distribution with mean in the interval \((0,1)\). We assigned vague prior distributions for the {latent field ({\(\beta_{0c}, \beta_{1c}, c = 1,\ldots,4\)})} \(p(x_n) \sim\) \(\mathcal{N}(0, \tau = 0.0001)\). As the data generated did not present zeros and ones, we did not use any transformation.}
results1 <- readRDS(file = "simulation2/simulation2_50-500.RDS")
results2 <- readRDS(file = "simulation2/simulation2_1000-10000.RDS")
results <- c(results1, results2)
res_ratios <- readRDS("simulation2/simulation2_ratios_jags.RDS")
#Computational times
result_time <- rbind(results$n50$times,
results$n100$times,
results$n500$times,
results$n1000$times,
results$n10000$times)
colnames(result_time) <- c("R-JAGS", "dirinla", "long R-JAGS")
rownames(result_time) <- paste0( c(50, 100, 500, 1000, 10000))
as.data.frame(result_time)
N <- c(50, 100, 500, 1000, 10000)
for(i in N){
cat("\n")
cat(sprintf(paste0("### N = ", i, "\n")))
cat(paste0("{width=100%}"))
cat("\n")
}
results1 <- readRDS(file = "simulation2/simulation2_50-500.RDS")
results2 <- readRDS(file = "simulation2/simulation2_1000-10000.RDS")
results <- c(results1, results2)
res_ratios <- readRDS("simulation2/simulation2_ratios_jags.RDS")
### ratios dirinla
result_ratio1 <- cbind(rbind(round(results$n50$ratio1_intercepts, 4),
round(results$n100$ratio1_intercepts, 4),
round(results$n500$ratio1_intercepts, 4),
round(results$n1000$ratio1_intercepts, 4),
round(results$n10000$ratio1_intercepts, 4)),
rbind(round(results$n50$ratio1_slopes, 4),
round(results$n100$ratio1_slopes, 4),
round(results$n500$ratio1_slopes, 4),
round(results$n1000$ratio1_slopes, 4),
round(results$n10000$ratio1_slopes, 4)))
colnames(result_ratio1) <- c(paste0("beta0", 1:4), paste0("beta1", 1:4))
rownames(result_ratio1) <- paste0( c(50, 100, 500, 1000, 10000))
result_ratio2 <- cbind(rbind(round(sqrt(results$n50$ratio2_intercepts), 4),
round(sqrt(results$n100$ratio2_intercepts), 4),
round(sqrt(results$n500$ratio2_intercepts), 4),
round(sqrt(results$n1000$ratio2_intercepts), 4),
round(sqrt(results$n10000$ratio2_intercepts), 4)),
rbind(round(sqrt(results$n50$ratio2_slopes), 4),
round(sqrt(results$n100$ratio2_slopes), 4),
round(sqrt(results$n500$ratio2_slopes), 4),
round(sqrt(results$n1000$ratio2_slopes), 4),
round(sqrt(results$n10000$ratio2_slopes), 4)))
colnames(result_ratio2) <- c(paste0("beta0", 1:4), paste0("beta1", 1:4))
rownames(result_ratio2) <- paste0( c(50, 100, 500, 1000, 10000))
### ratios short jags
result_ratio1_jags <- cbind(rbind(round(res_ratios$n50$ratio1_beta0_jags, 4),
round(res_ratios$n100$ratio1_beta0_jags, 4),
round(res_ratios$n500$ratio1_beta0_jags, 4),
round(res_ratios$n1000$ratio1_beta0_jags, 4),
round(res_ratios$n10000$ratio1_beta0_jags, 4)),
rbind(round(res_ratios$n50$ratio1_beta1_jags, 4),
round(res_ratios$n100$ratio1_beta1_jags, 4),
round(res_ratios$n500$ratio1_beta1_jags, 4),
round(res_ratios$n1000$ratio1_beta1_jags, 4),
round(res_ratios$n10000$ratio1_beta1_jags, 4)))
colnames(result_ratio1_jags) <- c(paste0("beta0", 1:4), paste0("beta1", 1:4))
rownames(result_ratio1_jags) <- paste0( c(50, 100, 500, 1000, 10000))
result_ratio2_jags <- cbind(rbind(round(sqrt(res_ratios$n50$ratio2_beta0_jags), 4),
round(sqrt(res_ratios$n100$ratio2_beta0_jags), 4),
round(sqrt(res_ratios$n500$ratio2_beta0_jags), 4),
round(sqrt(res_ratios$n1000$ratio2_beta0_jags), 4),
round(sqrt(res_ratios$n10000$ratio2_beta0_jags), 4)),
rbind(round(sqrt(res_ratios$n50$ratio2_beta1_jags), 4),
round(sqrt(res_ratios$n100$ratio2_beta1_jags), 4),
round(sqrt(res_ratios$n500$ratio2_beta1_jags), 4),
round(sqrt(res_ratios$n1000$ratio2_beta1_jags), 4),
round(sqrt(res_ratios$n10000$ratio2_beta1_jags), 4)))
colnames(result_ratio2_jags) <- c(paste0("beta0", 1:4), paste0("beta1", 1:4))
rownames(result_ratio2_jags) <- paste0( c(50, 100, 500, 1000, 10000))
cat(sprintf(paste0("### ratio1-dirinla", "\n")))
as.data.frame(result_ratio1)
cat(sprintf(paste0("### ratio1-RJAGS", "\n")))
as.data.frame(result_ratio1_jags)
cat(sprintf(paste0("### ratio2-dirinla", "\n")))
as.data.frame(result_ratio2)
cat(sprintf(paste0("### ratio2-RJAGS", "\n")))
as.data.frame(result_ratio2_jags)
To conduct the tests, we have simulated different datasets with the following structures. Each dataset has 4 covariates, one per category and a shared random effect. \[\begin{eqnarray} \log(\alpha_{1n}) & = & \eta_{1n} = \beta_{11} \cdot v_{1n} + w^1(j_n) \,, \nonumber \\ \log(\alpha_{2n}) & = & \eta_{2n} = \beta_{12} \cdot v_{2n} + w^1(j_n) \,, \nonumber \\ \log(\alpha_{3n}) & = & \eta_{3n} = \beta_{13} \cdot v_{3n} + w^2(j_n) \,, \nonumber \\ \log(\alpha_{4n}) & = & \eta_{4n} = \beta_{14} \cdot v_{4n} + w^2(j_n) \,, \nonumber \\ \end{eqnarray}\] where \(v_{kn}\) are covariates \(k = 1, \ldots, 4\) simulated from a random uniform (-1, 1), and \(w^1(j_n)\) and \(w^2(j_n)\) are iid shared random effects. \(j_n = 1, \ldots, J\), being \(J\) the levels of the factor.
We simulate datasets with N= 50, 100 and 500; and with different sizes for J. We have fitted the models using Gaussian priors for \(\beta\)s, Half Normal and pc-priors for \(\sigma_1\) and \(\sigma_2\). We are going to fit four different models:
Short-JAGS with Half Normal for standard deviations (mean = 0, sd = 1).
dirinla with pc-priors for standard deviations (sigma = 10, alpha = 0.01).
Long-JAGS with Half Normal for standard deviations (mean = 0, sd = 1).
dirinla with Half Normal for standard deviations (mean = 0, sd = 1).
When J = 50, 100, 500 and 1000, only the corresponding model J=N is fitted.
a <- readRDS(file = "simulation4/simulation4_50-500.RDS")
b <- readRDS(file = "simulation4/simulation4_1000.RDS")
results <- cbind(a,b)
res_ratios <- readRDS("simulation4/simulation4_ratios_jags.RDS")
res_times <- function(j){
N <- c(50, 100, 500, 1000)
if(j> 40){
n_levels_paper <- paste0(N[N==j], "-", N[N==j])
}else{
n_levels_paper <- paste0(N, "-", j)
}
#Computational times levels_factor = 2
times <- n_levels_paper %>% results["times",.] %>%
do.call(rbind, .)
colnames(times) <- c("R-JAGS", "dirinla-pc", "long R-JAGS", "dirinla-hn")
cat(sprintf(paste0("### J = ", j, "{.tabset} \n")))
as.data.frame(times)
}
#c(2,5,10, 25, 50, 100, 500, 1000)
i <- 2
res_times(j = i)
i <- 5
res_times(j = i)
i <- 10
res_times(j = i)
i <- 25
res_times(j = i)
i <- 50
res_times(j = i)
i <- 100
res_times(j = i)
i <- 500
res_times(j = i)
i <- 1000
res_times(j = i)
summary_print <- function(N, J)
{
cat("\n")
cat(sprintf(paste0("#### J = ", J, "\n")))
cat("**Parameters** \n")
cat(paste0("{width=100%}"))
cat("**Hyperparameters** \n ")
cat(paste0("{width=100%} \n \n "))
cat("<br>")
}
summary_print(N = 50, J = 2)
Parameters
Hyperparameters
summary_print(N = 50, J = 5)
Parameters
Hyperparameters
summary_print(N = 50, J = 10)
Parameters
Hyperparameters
summary_print(N = 50, J = 25)
Parameters
Hyperparameters
summary_print(N = 50, J = 50)
Parameters
Hyperparameters
summary_print(N = 100, J = 2)
Parameters
Hyperparameters
summary_print(N = 100, J = 5)
Parameters
Hyperparameters
summary_print(N = 100, J = 10)
Parameters
Hyperparameters
summary_print(N = 100, J = 25)
Parameters
Hyperparameters
summary_print(N = 100, J = 100)
Parameters
Hyperparameters
summary_print(N = 500, J = 2)
Parameters
Hyperparameters
summary_print(N = 500, J = 5)
Parameters
Hyperparameters
summary_print(N = 500, J = 10)
Parameters
Hyperparameters
summary_print(N = 500, J = 25)
Parameters
Hyperparameters
summary_print(N = 500, J = 500)
Parameters
Hyperparameters
summary_print(N = 1000, J = 2)
Parameters
Hyperparameters
summary_print(N = 1000, J = 5)
Parameters
Hyperparameters
summary_print(N = 1000, J = 10)
Parameters
Hyperparameters
summary_print(N = 1000, J = 25)
Parameters
Hyperparameters
summary_print(N = 1000, J = 1000)
Parameters
Hyperparameters
When J = 50, 100, 500 and 1000, only the corresponding model J=N is fitted.
a <- readRDS(file = "simulation4/simulation4_50-500.RDS")
b <- readRDS(file = "simulation4/simulation4_1000.RDS")
results <- cbind(a,b)
res_ratios <- readRDS("simulation4/simulation4_ratios_jags.RDS")
res_ratio1 <- function(j){
N <- c(50, 100, 500, 1000)
if(j> 40){
n_levels_paper <- paste0(N[N==j], "-", N[N==j])
}else{
n_levels_paper <- paste0(N, "-", j)
}
#Computational times levels_factor = 2
times <- n_levels_paper %>% results["times",.] %>%
do.call(rbind, .)
# DIRINLA:ratio1_beta1 and sigma
ratio1_beta1_hn <- n_levels_paper %>% results["ratio1_beta1_hn",.] %>%
do.call(rbind, .)
ratio1_sigma1_hn <- n_levels_paper %>% results["ratio1_sigma_hn",.] %>%
do.call(rbind, .)
ratio1_paper <- cbind(ratio1_beta1_hn, ratio1_sigma1_hn) %>% round(., 4)
colnames(ratio1_paper)[1:4] <- c(paste0("beta1", 1:4))
cat(sprintf(paste0("### J = ", j, "{.tabset} \n")))
cat(sprintf(paste0("#### ratio1-dirinla", "\n")))
as.data.frame(ratio1_paper)
}
res_ratio2 <- function(j){
N <- c(50, 100, 500, 1000)
if(j> 40){
n_levels_paper <- paste0(N[N==j], "-", N[N==j])
}else{
n_levels_paper <- paste0(N, "-", j)
}
# DIRINLA:ratio2_beta1 and sigma
ratio2_beta1_hn <- n_levels_paper %>% results["ratio2_beta1_hn",.] %>%
do.call(rbind, .) %>% sqrt(.)
ratio2_sigma1_hn <- n_levels_paper %>% results["ratio2_sigma_hn",.] %>%
do.call(rbind, .) %>% sqrt(.)
ratio2_paper <- cbind(ratio2_beta1_hn, ratio2_sigma1_hn) %>% round(., 4)
colnames(ratio2_paper)[1:4] <- c(paste0("beta1", 1:4))
cat(sprintf(paste0("#### ratio2-dirinla", "\n")))
as.data.frame(ratio2_paper)
}
res_ratio1_jags <- function(j){
N <- c(50, 100, 500, 1000)
if(j> 40){
n_levels_paper <- paste0(N[N==j], "-", N[N==j])
}else{
n_levels_paper <- paste0(N, "-", j)
}
# JAGS:ratio1_beta1 and sigma
ratio1_beta1_hn_jags <- n_levels_paper %>% res_ratios["ratio1_beta1_hn_jags",.] %>%
do.call(rbind, .)
ratio1_sigma1_hn_jags <- n_levels_paper %>% res_ratios["ratio1_sigma_hn_jags",.] %>%
do.call(rbind, .)
ratio1_paper_jags <- cbind(ratio1_beta1_hn_jags, ratio1_sigma1_hn_jags) %>% round(., 4)
colnames(ratio1_paper_jags) <- c(paste0("beta1", 1:4), paste0("sigma", 1:2))
cat(sprintf(paste0("#### ratio1-RJAGS", "\n")))
as.data.frame(ratio1_paper_jags)
}
res_ratio2_jags <- function(j){
N <- c(50, 100, 500, 1000)
if(j> 40){
n_levels_paper <- paste0(N[N==j], "-", N[N==j])
}else{
n_levels_paper <- paste0(N, "-", j)
}
# JAGS:ratio2_beta1 and sigma
ratio2_beta1_hn_jags <- n_levels_paper %>% res_ratios["ratio2_beta1_hn_jags",.] %>%
do.call(rbind, .) %>% sqrt(.)
ratio2_sigma1_hn_jags <- n_levels_paper %>% res_ratios["ratio2_sigma_hn_jags",.] %>%
do.call(rbind, .) %>% sqrt(.)
ratio2_paper_jags <- cbind(ratio2_beta1_hn_jags, ratio2_sigma1_hn_jags) %>% round(., 4)
colnames(ratio2_paper_jags) <- c(paste0("beta1", 1:4), paste0("sigma", 1:2))
cat(sprintf(paste0("#### ratio2-RJAGS", "\n")))
as.data.frame(ratio2_paper_jags)
}
#c(2,5,10, 25, 50, 100, 500, 1000)
i <- 2
res_ratio1(j = i)
res_ratio1_jags(j = i)
res_ratio2(j=i)
res_ratio2_jags(j=i)
i <- 5
res_ratio1(j = i)
res_ratio1_jags(j = i)
res_ratio2(j=i)
res_ratio2_jags(j=i)
i <- 10
res_ratio1(j = i)
res_ratio1_jags(j = i)
res_ratio2(j=i)
res_ratio2_jags(j=i)
i <- 25
res_ratio1(j = i)
res_ratio1_jags(j = i)
res_ratio2(j=i)
res_ratio2_jags(j=i)
i <- 50
res_ratio1(j = i)
res_ratio1_jags(j = i)
res_ratio2(j=i)
res_ratio2_jags(j=i)
i <- 100
res_ratio1(j = i)
res_ratio1_jags(j = i)
res_ratio2(j=i)
res_ratio2_jags(j=i)
i <- 500
res_ratio1(j = i)
res_ratio1_jags(j = i)
res_ratio2(j=i)
res_ratio2_jags(j=i)
i <- 1000
res_ratio1(j = i)
res_ratio1_jags(j = i)
res_ratio2(j=i)
res_ratio2_jags(j=i)